# setwd("~/Documents/Projects/anticle_analyses/REPLICATION_CPS")
# setwd("...")
options("modelsummary_format_numeric_latex" = "plain")
library(dplyr)
library(tidyr)
library(RColorBrewer)
library(stargazer)
library(ggplot2)
library(sf)
library(ggrepel)
library(classInt)
library(scales)
library(modelsummary)
library(kableExtra)
library(mapSpain)
library(ggspatial)
source("extra/func.R")
theme_set(my_theme())

data = read.csv("data/dataset.csv")
ac_ind = read.csv("data/anticlerical_individual.csv")

## SUMMARY STATISTICS

data_desc = data %>%
  dplyr::select(
    `Anticlerical violence` = anticlerical_l,
    `Anticlerical violence (death)` = anticlerical_muerte_l,
    `Employers' association` = patr33_bin,
    `CNT union` = CNT_bin,
    `UGT union` = UGT_bin,
    `Population 1930 (log)` = pop1930_l,
    `Leftist support 1936` = izq1936,
    `Days of Republican control` = dias_control_rep_l,
    `Illiteracy` = analfab30_total,
    `Number of clerics (log)` = clerics_all_l)

`NA (%)` = function(x){ sprintf("%.1f", mean(is.na(x)) * 100) }
ss = All(data_desc) ~ Mean + SD + Min + Median + Max + `NA (%)` + N

datasummary(ss, data_desc,
  title = "Summary statistics\\label{tab:sumstats}",
  histogram = FALSE, output = "output/summary_stats.tex") %>%
kable_styling(latex_options = "hold_position") %>%
save_kable(file = "output/summary_stats.tex")

## MISSING DATA BY CCAA/PROVINCE

na_by_ccaa = data %>%
  group_by(ccaa) %>%
  summarize(
    anticlerical_l = round(sum(is.na(anticlerical_l))/n(),1),
    anticlerical_muerte_l = round(sum(is.na(anticlerical_muerte_l))/n(),1),
    patr33_bin = round(sum(is.na(patr33_bin))/n(),1),
    CNT_bin = round(sum(is.na(CNT_bin))/n(),1),
    UGT_bin = round(sum(is.na(UGT_bin))/n(),1),
    pop1930_l = round(sum(is.na(pop1930_l))/n(),1),
    izq1936 = round(sum(is.na(izq1936))/n(),1),
    dias_control_rep_l = round(sum(is.na(dias_control_rep_l))/n(),1),
    analfab30_total = round(sum(is.na(analfab30_total))/n(),1),
    clerics_all_l = round(sum(is.na(clerics_all_l))/n(),1)
  )

na_by_prov = data %>%
  group_by(prov_name) %>%
  summarize(
    anticlerical_l = round(sum(is.na(anticlerical_l))/n(),1),
    anticlerical_muerte_l = round(sum(is.na(anticlerical_muerte_l))/n(),1),
    patr33_bin = round(sum(is.na(patr33_bin))/n(),1),
    CNT_bin = round(sum(is.na(CNT_bin))/n(),1),
    UGT_bin = round(sum(is.na(UGT_bin))/n(),1),
    pop1930_l = round(sum(is.na(pop1930_l))/n(),1),
    izq1936 = round(sum(is.na(izq1936))/n(),1),
    dias_control_rep_l = round(sum(is.na(dias_control_rep_l))/n(),1),
    analfab30_total = round(sum(is.na(analfab30_total))/n(),1),
    clerics_all_l = round(sum(is.na(clerics_all_l))/n(),1)
  )
# Remove provinces with no NAs
na_by_prov = na_by_prov[!rowSums(na_by_prov %>% select(-prov_name)) == 0,]

## DESCRIPTIVES - TIME SERIES

ac_ind$date = as.Date(ac_ind$date)

p = ggplot(ac_ind, aes(x = date)) +
  geom_histogram(binwidth = 30, color = "white") +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  labs(x = "", y = "Monthly killings")

ggsave("output/time_anticle_full.pdf", height = 4, width = 4, device = "pdf")

p = ggplot(ac_ind %>% filter(date < "1937-01-01"), aes(x = date)) +
  geom_histogram(binwidth = 7, color = "white") +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  labs(x = "", y = "Weekly killings")

ggsave("output/time_anticle_1936.pdf", height = 4, width = 4, device = "pdf")

p = ggplot(ac_ind %>% filter(date < "1937-01-01"), aes(x = date)) +
  geom_histogram(binwidth = 7, color = "white") +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  labs(x = "", y = "Weekly killings") +
  facet_grid(rows = vars(CLERO))

ggsave("output/time_anticle_1936_type.pdf", height = 5, width = 6, device = "pdf")

## MAPS

# # Get maps from mapSpain
# esp_get_can_box(style = "left", moveCAN = TRUE, epsg = "4258")
# esp_get_can_provinces(moveCAN = TRUE, epsg = "4258")
# esp_get_can_provinces(moveCAN = TRUE, epsg = "4258")

# Load shapefile
shp = st_read("extra/ESP_adm4_1930_2011.shp", "ESP_adm4_1930_2011")

# Moving the Canary Islands
crs = st_crs(shp)
shp_mainland = shp[!shp$NAME_2 %in% c("santa cruz de tenerife", "las palmas"),]
shp_ci_raw = shp[shp$NAME_2 %in% c("santa cruz de tenerife", "las palmas"),]
shp_ci = esp_move_can(shp_ci_raw, moveCAN = TRUE)
shp = rbind(shp_mainland, shp_ci)

# Add variables
shp$dias_control_rep = data$dias_control_rep[match(shp$muni_code, data$COD_INE)]
shp$anticlerical_bin = data$anticlerical_bin[match(shp$muni_code, data$COD_INE)]
shp$anticlerical_bin[is.na(shp$anticlerical_bin)] = 0
shp$anticlerical_l = data$anticlerical_l[match(shp$muni_code, data$COD_INE)]

# Get submaps for layers
anticlerical_violence = subset(shp, anticlerical_bin == 1)
anticlerical_noviolence = subset(shp, anticlerical_bin != 1)
francoist_control = subset(shp, dias_control_rep == 0)
# Get subdivisions et al
ccaa = esp_get_ccaa()
provs = esp_get_prov()
box = esp_get_can_box()
line = esp_get_can_provinces()
# Main cities
Madrid = esp_get_capimun(year=2011, region = "Madrid", munic = "^Madrid$", epsg = 4258)
Barcelona = esp_get_capimun(munic = "^Barcelona$", epsg = 4258)
Bilbao = esp_get_capimun(munic = "^Bilbao$", epsg = 4258)
Valencia = esp_get_capimun(munic = "^València$", epsg = 4258)
Sevilla = esp_get_capimun(munic = "^Sevilla$", epsg = 4258)
ciudades = rbind(Madrid, Barcelona, Bilbao, Valencia, Sevilla)
# Legend
legdtxt = c("Anticlerical violence", "Not anticlerical violence")

# Plot
map = ggplot() +
  geom_sf(data = anticlerical_violence,
    fill = "indianred", color = "indianred", size = 0.001) +
  geom_sf(data = anticlerical_noviolence,
    fill = "ghostwhite", color = "ghostwhite", size = 0.001) +
  # labs(x = "", y = "") +
  geom_sf(data = shp,  
    color = "azure3",  fill = NA,  size = 0.01, alpha= 0.01)+
  geom_sf(data = francoist_control,
    fill = "azure3", color = "azure3", size = 0.01)+
  labs(x = "", y = "") +
  geom_sf(data = provs,  
    color = "lightsteelblue3", fill = NA, size = 1.5) +
  geom_sf(data = box,  
    color = "slategray", fill = NA, size = 0.7) +
  geom_sf(data = line,  
    color = "slategray", fill = NA, size = 0.7)+
  geom_sf(data = ciudades,
    color = "azure4", fill = "azure4", alpha= 0.7, size = 2.5, shape = 20)+
  geom_sf(data = ccaa,  
    color = "slategray", fill = NA, size = 5.5)+
  geom_label_repel(data = ciudades,
    aes(label = name, geometry = geometry),
    stat = "sf_coordinates",
    size = 2.5, alpha = 0.75, box.padding = 0.37,
    fill = alpha(c("white"), 0.75)) +
  theme_minimal() +
  #annotation_north_arrow(location = "tl", which_north = "true", style = north_arrow_orienteering) +
  annotation_scale(location = "br")

# Saving
ggsave("output/map_deathsv2.pdf", device = "pdf")
ggsave("output/map_deathsv2.jpg", width = 10, dpi = 900)
